{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 导入模块，以下代码中主要是loc数据筛选的使用，在数据分析中使用频率很高。\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import os"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 导入数据\n",
    "data = pd.read_excel(rf'.\\2024病案首页.xlsx')\n",
    "year = '2024年'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 数据拼接\n",
    "data['疾病拼接'] = data['疾病编码(JBDM)'].astype(str) + data['疾病编码1(JBDM1)'].astype(str) + data['疾病编码2(JBDM2)'].astype(str)\\\n",
    "                + data['疾病编码3(JBDM3)'].astype(str) + data['疾病编码4(JBDM4)'].astype(str) + data['疾病编码5(JBDM5)'].astype(str)\\\n",
    "                + data['疾病编码6(JBDM6)'].astype(str) + data['疾病编码7(JBDM7)'].astype(str) + data['疾病编码8(JBDM8)'].astype(str)\\\n",
    "                + data['疾病编码9(JBDM9)'].astype(str) + data['疾病编码10(JBDM10)'].astype(str) + data['疾病编码11(JBDM11)'].astype(str)\\\n",
    "                + data['疾病编码12(JBDM12)'].astype(str) + data['疾病编码13(JBDM13)'].astype(str) + data['疾病编码14(JBDM14)'].astype(str)\\\n",
    "                + data['疾病编码15(JBDM15)'].astype(str)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 建立新列\n",
    "data['疾病标记'] = '-'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 将包含编码的数据进行标记\n",
    "data.loc[data['疾病拼接'].str.contains('C50'),'疾病标记'] = '乳腺癌'\n",
    "data.loc[data['疾病拼接'].str.contains('C34'),'疾病标记'] = '肺癌'\n",
    "data.loc[data['疾病拼接'].str.contains('C22'),'疾病标记'] = '肝癌'\n",
    "data.loc[data['疾病拼接'].str.contains('C22.1'),'疾病标记'] = '肝内胆管癌'\n",
    "data.loc[data['疾病拼接'].str.contains('C53'),'疾病标记'] = '宫颈癌'\n",
    "data.loc[data['疾病拼接'].str.contains('C16'),'疾病标记'] = '胃癌'\n",
    "data.loc[data['疾病拼接'].str.contains('C18|C19|C20'),'疾病标记'] = '结直肠癌'\n",
    "data.loc[data['疾病拼接'].str.contains('C15'),'疾病标记'] = '食道癌'\n",
    "data.loc[data['疾病拼接'].str.contains('C73'),'疾病标记'] = '甲状腺癌'\n",
    "data.loc[data['疾病拼接'].str.contains('C61'),'疾病标记'] = '前列腺癌'\n",
    "data.loc[data['疾病拼接'].str.contains('C64'),'疾病标记'] = '肾癌'\n",
    "data.loc[data['疾病拼接'].str.contains('Z51'),'疾病标记'] = '恶性肿瘤其他治疗'\n",
    "data.loc[data['疾病拼接'].str.contains('Z85'),'疾病标记'] = '恶性肿瘤个人史'\n",
    "data.loc[data['疾病拼接'].str.contains('D45|D46|D47'),'疾病标记'] = '血液系统疾病'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 导出数据\n",
    "data.to_excel(rf'.\\{year}直报系统病案首页标记.xlsx')"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
